
### Load data & packages --------
library(sna)
library(dplyr)
library(stargazer)
#install.packages("plm")
library("lmtest")
library("sandwich")
library("plm")
library(stringr)
library(rlang)
library(car)
library(ic.infer)
library(multiwayvcov)
library("pscl")
library(mlogit)
library("margins")

## Function for obtaining robust standard errors (Necessary)
robust_se <- function(some_reg, some_method) {
  coeftest(some_reg, vcov = vcovHC(some_reg, type = some_method))
} 

## Function for clustered standard errors (Necessary)
cluster_se <- function(some_reg, cluster_var) {
  
  vcov_var <- cluster.vcov(some_reg, formula(paste("~", cluster_var)))
  results <- coeftest(some_reg, vcov_var)
  return(results)
} 

## Function to obtain adjusted R2 from robust SE (Necessary) 
get_adj_r <- function(some_regression){
  round(summary(some_regression)$adj.r.squared, 3)
}

## Function to obtain sample size from robust SE (Necessary)
get_obs <- function(some_reg) {
  length(summary(some_reg)$residuals)
}

## General regressions (Necessary)
general_reg <- function(dep_var, race_var, grade_var) {
  # dep_var = "ssi_same_negro_na_2_std"
  # race_var = "factor(negro_na)"
  # grade_var = "grades_score"
  
  ind_controls <- paste("male", "factor(religion_simple)", 
                        "age_grade_distortion", "score_self_esteem", 
                        "score_parents_support", "score_study", sep = "+")
  
  ses_controls <- paste("scores_poverty", "score_neighborhood_quality", 
                        "factor(occ_father_simple_no_job_no_na)",
                        "factor(occ_mother_simple_no_job_no_na)", sep = "+")
  
  class_fe <- paste("factor(class_id)")
  
  # basic regression
  basic_reg <- paste0(dep_var, "~", paste0(race_var,"*", grade_var))
  
  Form_basic <- formula(basic_reg)
  
  # Individual control
  Form_ind <- formula(paste(basic_reg, ind_controls, sep = "+"))
  
  # SES control
  Form_ses <- formula(paste(basic_reg, ses_controls, sep = "+"))
  
  # Individual + SES control
  Form_ind_ses <- formula(paste(basic_reg, ind_controls, 
                                ses_controls, sep = "+"))
  
  # Individual controls + Classroom FE
  Form_ind_class_fe <- formula(paste(basic_reg, ind_controls, 
                                     class_fe, sep = "+"))
  
  # SES controls + Classroom FE
  Form_ses_class_fe <- formula(paste(basic_reg, ses_controls, 
                                     class_fe, sep = "+"))
  
  # Ind. + SES controls + Classroom FE
  Form_ind_ses_class_fe <- formula(paste(basic_reg, ind_controls,
                                         ses_controls, class_fe, 
                                         sep = "+"))
  
  list_specifications <- c(Form_basic, Form_ind, Form_ses, Form_ind_ses,
                           Form_ind_class_fe, Form_ses_class_fe, 
                           Form_ind_ses_class_fe) 
  
  results <- lapply(list_specifications, function(x) (lm(x, data = data, subset = (race < 4))))
  results_r <- lapply(results, robust_se, "HC3")
  results_c <- lapply(results, cluster_se, "class_id")
  
  results_final <- list(results, results_r, results_c)  
  return(results_final)
}


## Function to extract information from Linear Hypothesis test: (Necessary!)
race_score_linear_test <- function(some_reg, race_inter, test_var) {
  # some_reg <- lm(negro_na ~ skin_color + dummy_skin*grades_score
  #  + masculino + factor(religion_simple)
  #  + age_grade_distortion
  #  + score_neighborhood_quality + score_self_esteem
  #  + score_parents_support + score_study,
  #  data = data, subset = (race < 4))
  # summary(some_reg)
  # test_var = "grades_score"
  # race_inter = "dummy_skin"
  
  teste_paste <- paste0(paste0(race_inter, ":", test_var, "+", test_var,  "= 0"))
  teste_results <- linearHypothesis(some_reg, c(teste_paste), vcov = hccm)
  return(c(teste_results$F[2], teste_results$`Pr(>F)`[2]))
}


general_reg_race <- function(dep_var, ind_vars) {
  
  # dep_var = "negro_na"
  # ind_vars <- c("skin_color", "grades_score")
  
  ind_controls <- paste("male", "factor(religion_simple)", "age_grade_distortion",
                        "score_self_esteem", "score_parents_support", "score_study", sep = "+")
  
  ses_controls <- paste("scores_poverty", "score_neighborhood_quality",
                        "factor(occ_mother_simple_no_job_no_na)",
                        "factor(occ_father_simple_no_job_no_na)", sep = "+")
  
  class_fe <- paste("factor(class_id)")
  
  supply_friends <- paste("supply_2_sd_same_race_fr_2")
  
  supply_class_friends <-  paste("n_black_second_4", "n_black_third_4", "n_black_fourth_4",
                                 "n_brown_first_4", "n_brown_second_4", "n_brown_third_4", 
                                 "n_brown_fourth_4", "n_white_first_4", "n_white_second_4",
                                 "n_white_third_4", "n_white_fourth_4", sep = "+") 
  
  basic_reg <- paste0(dep_var, "~", paste(ind_vars, collapse = "+"))
  
  # Basic Regression:
  Form_basic <- formula(basic_reg)
  
  # SES
  Form_ses <- formula(paste(basic_reg, ses_controls, sep = "+"))
  
  # SES + Individual controls
  Form_ses_indiv <- formula(paste(basic_reg, ses_controls, ind_controls, sep = "+"))
  
  # SES + Indiv + Class FE 
  Form_ses_indiv_class_fe <- formula(paste(basic_reg, ses_controls, 
                                           ind_controls, class_fe,
                                           sep = "+"))
  
  
  list_specifications <- c(Form_basic, Form_ses, Form_ses_indiv, 
                           Form_ses_indiv_class_fe) 
  
  results <- lapply(list_specifications, function(x) 
    (lm(x, data = data, subset = (race < 4))))
  
  results_rest <- lapply(list_specifications, function(x) 
    (lm(x, data = data, subset = (race < 4 & skin_color <=10 &
                                    skin_color >=3))))
  
  results_r <- lapply(results, robust_se, "HC3")
  results_rest_r <- lapply(results_rest, robust_se, "HC3")
  
  results_cluster <- lapply(results, cluster_se, "class_id")
  results_rest_cluster <- lapply(results_rest, cluster_se, "class_id")
  
  results_final <- list(results, results_r, results_cluster, 
                        results_rest, results_rest_r,
                        results_rest_cluster)  
  return(results_final)
  
}


regressions_1 <- function(dep_var, race_var, grade_var) {
  # dep_var = "ssi_same_negro_na_2_std"
  # race_var = "factor(negro_na)"
  # grade_var = "grades_score"
  
  # basic regression
  basic_reg <- paste0(dep_var, "~", paste0(race_var,"*", grade_var))
  
  Form_basic <- formula(basic_reg)
  
  # Individual control
  Form_ind <- formula(paste(basic_reg, ind_controls, sep = "+"))
  
  # SES control
  Form_ses <- formula(paste(basic_reg, ses_controls, sep = "+"))
  
  # Individual + SES control
  Form_ind_ses <- formula(paste(basic_reg, ind_controls, 
                                ses_controls, sep = "+"))
  
  # Individual controls + Classroom FE
  Form_ind_class_fe <- formula(paste(basic_reg, ind_controls, 
                                     class_fe, sep = "+"))
  
  # SES controls + Classroom FE
  Form_ses_class_fe <- formula(paste(basic_reg, ses_controls, 
                                     class_fe, sep = "+"))
  
  # Ind. + SES controls + Classroom FE
  Form_ind_ses_class_fe <- formula(paste(basic_reg, ind_controls,
                                         ses_controls, class_fe, 
                                         sep = "+"))
  
  list_specifications <- c(Form_basic, Form_ind, Form_ses, Form_ind_ses,
                           Form_ind_class_fe, Form_ses_class_fe, 
                           Form_ind_ses_class_fe) 
  
  results <- lapply(list_specifications, function(x) (lm(x, data = data, subset = (race < 4))))
  results_r <- lapply(results, robust_se, "HC3")
  results_c <- lapply(results, cluster_se, "class_id")
  
  results_final <- list(results, results_r, results_c)  
  return(results_final)
}
